import csv
import re
import requests

url = 'https://www.dy2018.com/'


head = {
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36 Edg/97.0.1072.69'
}
#newline="" 换行
f = open("movie.csv", mode="w", encoding="gb2312", newline="")
Writer = csv.writer(f)
# 表头
# Writer.writerow(['地址','下载'])
data = requests.get(url, headers=head)
data.encoding = 'gb2312'
DataText = data.text
data.close()

obj = re.compile(r"2022必看热片.*?<ul>(?P<Date>.*?)</ul>", re.S)
obj2 = re.compile(r"<a href='(?P<Url>.*?)'")

for it in obj.finditer(DataText):
    dateText = (it.group("Date").strip())

# 将网址链接写入字典中
for it2 in obj2.finditer(dateText):
#     #写入字典
#     dic =it2.groupdict()
    # dic['Url']=url+dic['Url'].strip("/")
    # Writer.writerow(dic.values())

    #拿到电影列别再循环爬取 下载链接即可
    httpdata = requests.get(url + it2.group("Url").strip("/"))
    httpdata.encoding = 'gb2312'
    httpdataText=httpdata.text
    httpdata.close()

    movename = re.compile(r'<div class="title_all"><h1>.*?年(?P<Name>.*?)</h1></div>', re.S)
    httpobj = re.compile(
                         r'<div id="downlist"  style="display:none">.*?'
                         r'<td style="WORD-WRAP: break-word" bgcolor="#fdfddf"><a href="(?P<Download>.*?)">.*?</a></td>'
                         , re.S)

    for itname in movename.finditer(httpdataText):
        # 输出单个过滤结果
        # print(itname.group("Name"))
        # 输出评分
        # print( re.findall(r'\d',itname.group("Name")))

        dic = itname.groupdict()
        Writer.writerow(dic.values())

    for it3 in httpobj.finditer(httpdataText):
        dic = it3.groupdict()
        Writer.writerow(dic.values())



f.close()
print("OK!")








